# coding=UTF-8
import re
import urllib
import time


# 获取网页源代码
def gethtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html


# 制定抓取规则，尝试抓取
def getImg(html):
    reg = r'src="(.*?\.jpg)"'
    imgre = re.compile(reg)
    imglist = re.findall(imgre, html)
    print imglist
    x = 1
    for imgurl in imglist:
        try:
            print '尝试抓取第%s个' % x
            print imgurl
            urllib.urlretrieve(imgurl, '%s.jpg' % x)
            x += 1
        except IOError:
            print "网络连接出错，第%s个抓取失败" % x
            x += 1
            time.sleep(3)
if __name__=="__main__":
    html = gethtml("https://tieba.baidu.com/p/5157150520?fid=608")
    getImg(html)
# 对于大规模数据爬取还是框架

# 安装网址：http://jingyan.baidu.com/article/14bd256e748346bb6d2612be.html

